We limit to 1200 because removing the outliers leaves us with a nicely shaped distribution.
H0: The data are fit well by a Poisson Distribution H1: Poisson fails to fit the data well
The Poisson distribution obviously does not fit the data well since p approx 0. Let’s use a NegBin instead, which can account for differences in the mean and variance.
Model backers
formula = backers ~ shares_sc + friends_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + duration_float_sc + cancer_type + month + day_of_week + (1|year)
mod = glmmTMB(formula, data = dat.b, family = "nbinom2")
formula = update(formula, ~ . - day_of_week)
new.mod = glmmTMB(formula, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## new.mod: backers ~ shares_sc + friends_sc + updates_sc + photos_sc + goal_sc + , zi=~0, disp=~1
## new.mod: text_length_words_sc + duration_float_sc + cancer_type + , zi=~0, disp=~1
## new.mod: month + (1 | year), zi=~0, disp=~1
## mod: backers ~ shares_sc + friends_sc + updates_sc + photos_sc + goal_sc + , zi=~0, disp=~1
## mod: text_length_words_sc + duration_float_sc + cancer_type + , zi=~0, disp=~1
## mod: month + day_of_week + (1 | year), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 40 54011 54273 -26966 53931
## mod 46 54020 54320 -26964 53928 3.6141 6 0.7287
mod = new.mod
formula = update(formula, ~ . - friends_sc)
new.mod = glmmTMB(formula, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## mod: backers ~ shares_sc + friends_sc + updates_sc + photos_sc + goal_sc + , zi=~0, disp=~1
## mod: text_length_words_sc + duration_float_sc + cancer_type + , zi=~0, disp=~1
## mod: month + (1 | year), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 39 54010 54265 -26966 53932
## mod 40 54011 54273 -26966 53931 0.8242 1 0.364
mod = new.mod
summary(mod)
## Family: nbinom2 ( log )
## Formula:
## backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc +
## duration_float_sc + cancer_type + month + (1 | year)
## Data: dat.b
##
## AIC BIC logLik deviance df.resid
## 54010.1 54264.8 -26966.0 53932.1 5040
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## year (Intercept) 0.004179 0.06464
## Number of obs: 5079, groups: year, 7
##
## Overdispersion parameter for nbinom2 family (): 1.67
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.261461 0.059680 71.41 < 2e-16 ***
## shares_sc 0.774464 0.025946 29.85 < 2e-16 ***
## updates_sc -0.057873 0.013568 -4.27 1.99e-05 ***
## photos_sc 0.123559 0.014915 8.28 < 2e-16 ***
## goal_sc 0.319677 0.018068 17.69 < 2e-16 ***
## text_length_words_sc 0.086437 0.012738 6.79 1.15e-11 ***
## duration_float_sc 0.058702 0.023195 2.53 0.011381 *
## cancer_typebone cancer 0.286356 0.060564 4.73 2.27e-06 ***
## cancer_typebrain cancer 0.319529 0.065559 4.87 1.09e-06 ***
## cancer_typebreast cancer 0.417008 0.053071 7.86 3.92e-15 ***
## cancer_typecervical cancer -0.122597 0.119826 -1.02 0.306250
## cancer_typecolon cancer 0.224359 0.114368 1.96 0.049795 *
## cancer_typeesophageal cancer 0.356066 0.071040 5.01 5.38e-07 ***
## cancer_typegeneral 0.329681 0.051315 6.42 1.32e-10 ***
## cancer_typekidney cancer 0.244011 0.059710 4.09 4.38e-05 ***
## cancer_typeleukemia 0.557847 0.068770 8.11 4.99e-16 ***
## cancer_typeliver cancer -0.278111 0.081504 -3.41 0.000644 ***
## cancer_typelung cancer -0.263824 0.056699 -4.65 3.27e-06 ***
## cancer_typelymphoma 0.370292 0.055296 6.70 2.13e-11 ***
## cancer_typemelanoma -0.020420 0.074660 -0.27 0.784468
## cancer_typemixed 0.095720 0.054039 1.77 0.076506 .
## cancer_typeneuroblastoma 0.466555 0.094777 4.92 8.54e-07 ***
## cancer_typepancreatic cancer 0.468659 0.144838 3.24 0.001213 **
## cancer_typeprostate cancer 0.086021 0.133827 0.64 0.520367
## cancer_typeskin cancer -0.048203 0.068193 -0.71 0.479651
## cancer_typetesticular cancer 0.547275 0.103723 5.28 1.32e-07 ***
## month2 -0.032726 0.047491 -0.69 0.490763
## month3 0.008621 0.057833 0.15 0.881500
## month4 -0.081410 0.059025 -1.38 0.167818
## month5 -0.141690 0.055709 -2.54 0.010978 *
## month6 -0.106167 0.056335 -1.88 0.059489 .
## month7 -0.107387 0.054619 -1.97 0.049286 *
## month8 -0.003657 0.054440 -0.07 0.946439
## month9 -0.077024 0.053898 -1.43 0.152982
## month10 -0.094441 0.052974 -1.78 0.074624 .
## month11 -0.054860 0.053359 -1.03 0.303887
## month12 0.021928 0.053724 0.41 0.683157
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Add metaphors
formula.temp = update(formula, ~ . + no_metaphor)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + no_metaphor, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 54010 54265 -26966 53932
## new.mod 40 53976 54237 -26948 53896 36.344 1 1.654e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fixef(new.mod)$cond["no_metaphorTRUE"]
## no_metaphorTRUE
## -0.1386814
formula.temp = update(formula, ~ . + any_metaphor)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + any_metaphor, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 54010 54265 -26966 53932
## new.mod 40 53976 54237 -26948 53896 36.344 1 1.654e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fixef(new.mod)$cond["any_metaphorTRUE"]
## any_metaphorTRUE
## 0.138681
formula.temp = update(formula, ~ . + dom_journey)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + dom_journey, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 54010 54265 -26966 53932
## new.mod 40 54008 54269 -26964 53928 4.1864 1 0.04075 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fixef(new.mod)$cond["dom_journeyTRUE"]
## dom_journeyTRUE
## 0.09338186
formula.temp = update(formula, ~ . + dom_journey + journey_prod)
new.mod.prod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod.prod, new.mod, test="Chisq")
## Data: dat.b
## Models:
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + dom_journey, zi=~0, disp=~1
## new.mod.prod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod.prod: duration_float_sc + cancer_type + month + (1 | year) + dom_journey + , zi=~0, disp=~1
## new.mod.prod: journey_prod, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 40 54008 54269 -26964 53928
## new.mod.prod 41 54010 54278 -26964 53928 0.0129 1 0.9094
fixef(new.mod.prod)$cond["journey_prod"]
## journey_prod
## 0.002608997
formula.temp = update(formula, ~ . + dom_battle)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + dom_battle, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 54010 54265 -26966 53932
## new.mod 40 53992 54254 -26956 53912 19.61 1 9.496e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fixef(new.mod)$cond["dom_battleTRUE"]
## dom_battleTRUE
## 0.09952134
formula.temp = update(formula, ~ . + dom_battle + battle_prod)
new.mod.prod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod.prod, new.mod, test="Chisq")
## Data: dat.b
## Models:
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + dom_battle, zi=~0, disp=~1
## new.mod.prod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod.prod: duration_float_sc + cancer_type + month + (1 | year) + dom_battle + , zi=~0, disp=~1
## new.mod.prod: battle_prod, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 40 53992 54254 -26956 53912
## new.mod.prod 41 53989 54257 -26954 53907 5.0724 1 0.02431 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fixef(new.mod.prod)$cond["battle_prod"]
## battle_prod
## 0.0122179
formula.temp = update(formula, ~ . + only_battle)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + only_battle, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 54010 54265 -26966 53932
## new.mod 40 53993 54254 -26956 53913 19.433 1 1.042e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fixef(new.mod)$cond["only_battleTRUE"]
## only_battleTRUE
## 0.09929021
formula.temp = update(formula, ~ . + only_battle + battle_prod)
new.mod.prod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod.prod, new.mod, test="Chisq")
## Data: dat.b
## Models:
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + only_battle, zi=~0, disp=~1
## new.mod.prod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod.prod: duration_float_sc + cancer_type + month + (1 | year) + only_battle + , zi=~0, disp=~1
## new.mod.prod: battle_prod, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 40 53993 54254 -26956 53913
## new.mod.prod 41 53987 54255 -26953 53905 7.121 1 0.007618 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fixef(new.mod.prod)$cond["battle_prod"]
## battle_prod
## 0.01302743
formula.temp = update(formula, ~ . + only_journey)
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + only_journey, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 54010 54265 -26966 53932
## new.mod 40 54011 54272 -26965 53931 1.2492 1 0.2637
fixef(new.mod)$cond["only_journeyTRUE"]
## only_journeyTRUE
## 0.05633693
formula.temp = update(formula, ~ . + only_journey + journey_prod)
new.mod.prod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod.prod, new.mod, test="Chisq")
## Data: dat.b
## Models:
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + only_journey, zi=~0, disp=~1
## new.mod.prod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod.prod: duration_float_sc + cancer_type + month + (1 | year) + only_journey + , zi=~0, disp=~1
## new.mod.prod: journey_prod, zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## new.mod 40 54011 54272 -26965 53931
## new.mod.prod 41 54012 54280 -26965 53930 0.9795 1 0.3223
fixef(new.mod.prod)$cond["journey_prod"]
## journey_prod
## 0.0197251
formula.temp = update(formula, ~ . + scale(battle_salience))
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + scale(battle_salience), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 54010 54265 -26966 53932
## new.mod 40 53994 54255 -26957 53914 17.983 1 2.229e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fixef(new.mod)$cond["scale(battle_salience)"]
## scale(battle_salience)
## 0.0482378
formula.temp = update(formula, ~ . + scale(journey_salience))
new.mod = glmmTMB(formula.temp, data = dat.b, family = "nbinom2")
anova(new.mod, mod, test="Chisq")
## Data: dat.b
## Models:
## mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## mod: duration_float_sc + cancer_type + month + (1 | year), zi=~0, disp=~1
## new.mod: backers ~ shares_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + , zi=~0, disp=~1
## new.mod: duration_float_sc + cancer_type + month + (1 | year) + scale(journey_salience), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod 39 54010 54265 -26966 53932
## new.mod 40 54004 54265 -26962 53924 7.9296 1 0.004863 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fixef(new.mod)$cond["scale(journey_salience)"]
## scale(journey_salience)
## 0.03118
On average, not having metaphors present lowers the expected value of log(backers) by 17.3%. When battle metaphors are dominant, the expected value of log(backers) increases by 13.1%. When battle metaphors are dominant, then an additional unit of battle productivity increases the expected value of log(backers) by 1.1%. When there are only battle metaphors present, we see an increase of 12.9%. Unit increases in battle salience lead to 7.2% increases in log(backers) expected value, and similarly, a unit increase in journey salience leads to a 3.8% increase.